# -*- coding: utf-8 -*-
import scrapy

from jdjmoviespider.items import JdjmoviespiderItem


class DoubanSpiderSpider(scrapy.Spider):
    # 爬虫名称
    name = 'douban_spider'
    #
    allowed_domains = ['movie.douban.com']
    # 入库
    start_urls = ['https://movie.douban.com/top250']

    def parse(self, response):
        movie_list = response.xpath("//div[@class='article']//ol[@class='grid_view']/li")
        for i_item in movie_list:
            douban_item = JdjmoviespiderItem()
            douban_item['movie_source'] = 'douban'
            douban_item['movie_name'] = i_item.xpath(
                ".//div[@class='info']//span[1][@class='title']/text()").extract_first()
            douban_item['movie_url'] = i_item.xpath(".//div[@class='pic']//a/@href").extract_first()
            douban_item['movie_img'] = i_item.xpath(".//div[@class='pic']//a/img/@src").extract_first()
            douban_item['movie_desc'] = i_item.xpath(".//div[@class='info']//div[@class='bd']/p/text()").extract_first()
            yield douban_item
        next_link = response.xpath("//span[@class='next']/link/@href").extract()
        if next_link:
            next_link = next_link[0]
            yield scrapy.Request('https://movie.douban.com/top250' + next_link, callback=self.parse)
